/*
 * $HeadURL: https://svn.apache.org/repos/asf/jakarta/httpcomponents/oac.hc3x/tags/HTTPCLIENT_3_1/src/java/org/apache/commons/httpclient/URI.java $
 * $Revision: 564973 $
 * $Date: 2007-08-11 22:51:47 +0200 (Sat, 11 Aug 2007) $
 *
 * ====================================================================
 *
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 */

package org.apache.commons.httpclient;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Locale;
import java.util.BitSet;
import java.util.Hashtable;

import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.httpclient.util.EncodingUtil;

/**
 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
 * This class has the purpose of supportting of parsing a URI reference to
 * extend any specific protocols, the character encoding of the protocol to be
 * transported and the charset of the document.
 * <p>
 * A URI is always in an "escaped" form, since escaping or unescaping a
 * completed URI might change its semantics.
 * <p>
 * Implementers should be careful not to escape or unescape the same string more
 * than once, since unescaping an already unescaped string might lead to
 * misinterpreting a percent data character as another escaped character, or
 * vice versa in the case of escaping an already escaped string.
 * <p>
 * In order to avoid these problems, data types used as follows:
 * <p>
 * <blockquote>
 * 
 * <pre>
 *   URI character sequence: char
 *   octet sequence: byte
 *   original character sequence: String
 * </pre>
 * 
 * </blockquote>
 * <p>
 * 
 * So, a URI is a sequence of characters as an array of a char type, which is
 * not always represented as a sequence of octets as an array of byte.
 * <p>
 * 
 * URI Syntactic Components
 * <p>
 * <blockquote>
 * 
 * <pre>
 * - In general, written as follows:
 *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
 *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
 * 
 * - Syntax
 *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
 *   hier_part     = ( net_path | abs_path ) [ "?" query ]
 *   net_path      = "//" authority [ abs_path ]
 *   abs_path      = "/"  path_segments
 * </pre>
 * 
 * </blockquote>
 * <p>
 * 
 * The following examples illustrate URI that are in common use.
 * 
 * <pre>
 * ftp://ftp.is.co.za/rfc/rfc1808.txt
 *    -- ftp scheme for File Transfer Protocol services
 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
 *    -- gopher scheme for Gopher and Gopher+ Protocol services
 * http://www.math.uio.no/faq/compression-faq/part1.html
 *    -- http scheme for Hypertext Transfer Protocol services
 * mailto:mduerst@ifi.unizh.ch
 *    -- mailto scheme for electronic mail addresses
 * news:comp.infosystems.www.servers.unix
 *    -- news scheme for USENET news groups and articles
 * telnet://melvyl.ucop.edu/
 *    -- telnet scheme for interactive services via the TELNET Protocol
 * </pre>
 * 
 * Please, notice that there are many modifications from URL(RFC 1738) and
 * relative URL(RFC 1808).
 * <p>
 * <b>The expressions for a URI</b>
 * <p>
 * 
 * <pre>
 * For escaped URI forms
 *  - URI(char[]) // constructor
 *  - char[] getRawXxx() // method
 *  - String getEscapedXxx() // method
 *  - String toString() // method
 * <p>
 * For unescaped URI forms
 *  - URI(String) // constructor
 *  - String getXXX() // method
 * </pre>
 * <p>
 * 
 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
 * @version $Revision: 564973 $ $Date: 2002/03/14 15:14:01
 */
public class URI implements Cloneable, Comparable, Serializable {

	// ----------------------------------------------------------- Constructors

	/** Create an instance as an internal use */
	protected URI() {
	}

	/**
	 * Construct a URI from a string with the given charset. The input string
	 * can be either in escaped or unescaped form.
	 * 
	 * @param s
	 *            URI character sequence
	 * @param escaped
	 *            <tt>true</tt> if URI character sequence is in escaped form.
	 *            <tt>false</tt> otherwise.
	 * @param charset
	 *            the charset string to do escape encoding, if required
	 * 
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @throws NullPointerException
	 *             if input string is <code>null</code>
	 * 
	 * @see #getProtocolCharset
	 * 
	 * @since 3.0
	 */
	public URI(String s, boolean escaped, String charset) throws URIException,
			NullPointerException {
		protocolCharset = charset;
		parseUriReference(s, escaped);
	}

	/**
	 * Construct a URI from a string with the given charset. The input string
	 * can be either in escaped or unescaped form.
	 * 
	 * @param s
	 *            URI character sequence
	 * @param escaped
	 *            <tt>true</tt> if URI character sequence is in escaped form.
	 *            <tt>false</tt> otherwise.
	 * 
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @throws NullPointerException
	 *             if input string is <code>null</code>
	 * 
	 * @see #getProtocolCharset
	 * 
	 * @since 3.0
	 */
	public URI(String s, boolean escaped) throws URIException,
			NullPointerException {
		parseUriReference(s, escaped);
	}

	/**
	 * Construct a URI as an escaped form of a character array with the given
	 * charset.
	 * 
	 * @param escaped
	 *            the URI character sequence
	 * @param charset
	 *            the charset string to do escape encoding
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @throws NullPointerException
	 *             if <code>escaped</code> is <code>null</code>
	 * @see #getProtocolCharset
	 * 
	 * @deprecated Use #URI(String, boolean, String)
	 */
	public URI(char[] escaped, String charset) throws URIException,
			NullPointerException {
		protocolCharset = charset;
		parseUriReference(new String(escaped), true);
	}

	/**
	 * Construct a URI as an escaped form of a character array. An URI can be
	 * placed within double-quotes or angle brackets like "http://test.com/" and
	 * &lt;http://test.com/&gt;
	 * 
	 * @param escaped
	 *            the URI character sequence
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @throws NullPointerException
	 *             if <code>escaped</code> is <code>null</code>
	 * @see #getDefaultProtocolCharset
	 * 
	 * @deprecated Use #URI(String, boolean)
	 */
	public URI(char[] escaped) throws URIException, NullPointerException {
		parseUriReference(new String(escaped), true);
	}

	/**
	 * Construct a URI from the given string with the given charset.
	 * 
	 * @param original
	 *            the string to be represented to URI character sequence It is
	 *            one of absoluteURI and relativeURI.
	 * @param charset
	 *            the charset string to do escape encoding
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @see #getProtocolCharset
	 * 
	 * @deprecated Use #URI(String, boolean, String)
	 */
	public URI(String original, String charset) throws URIException {
		protocolCharset = charset;
		parseUriReference(original, false);
	}

	/**
	 * Construct a URI from the given string.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * An URI can be placed within double-quotes or angle brackets like
	 * "http://test.com/" and &lt;http://test.com/&gt;
	 * 
	 * @param original
	 *            the string to be represented to URI character sequence It is
	 *            one of absoluteURI and relativeURI.
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 * 
	 * @deprecated Use #URI(String, boolean)
	 */
	public URI(String original) throws URIException {
		parseUriReference(original, false);
	}

	/**
	 * Construct a general URI from the given components.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
	 *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
	 *   opaque_part   = uric_no_slash *uric
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
	 * &lt;fragment&gt;.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param schemeSpecificPart
	 *            scheme_specific_part
	 * @param fragment
	 *            the fragment string
	 * @throws URIException
	 *             If the URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String schemeSpecificPart, String fragment)
			throws URIException {

		// validate and contruct the URI character sequence
		if (scheme == null) {
			throw new URIException(URIException.PARSING, "scheme required");
		}
		char[] s = scheme.toLowerCase().toCharArray();
		if (validate(s, URI.scheme)) {
			_scheme = s; // is_absoluteURI
		} else {
			throw new URIException(URIException.PARSING, "incorrect scheme");
		}
		_opaque = encode(schemeSpecificPart, allowed_opaque_part,
				getProtocolCharset());
		// Set flag
		_is_opaque_part = true;
		_fragment = fragment == null ? null : fragment.toCharArray();
		setURI();
	}

	/**
	 * Construct a general URI from the given components.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
	 *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
	 *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
	 *   hier_part     = ( net_path | abs_path ) [ "?" query ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
	 * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
	 * &gt;.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param authority
	 *            the authority string
	 * @param path
	 *            the path string
	 * @param query
	 *            the query string
	 * @param fragment
	 *            the fragment string
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String authority, String path, String query,
			String fragment) throws URIException {

		// validate and contruct the URI character sequence
		StringBuffer buff = new StringBuffer();
		if (scheme != null) {
			buff.append(scheme);
			buff.append(':');
		}
		if (authority != null) {
			buff.append("//");
			buff.append(authority);
		}
		if (path != null) { // accept empty path
			if ((scheme != null || authority != null) && !path.startsWith("/")) {
				throw new URIException(URIException.PARSING,
						"abs_path requested");
			}
			buff.append(path);
		}
		if (query != null) {
			buff.append('?');
			buff.append(query);
		}
		if (fragment != null) {
			buff.append('#');
			buff.append(fragment);
		}
		parseUriReference(buff.toString(), false);
	}

	/**
	 * Construct a general URI from the given components.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param userinfo
	 *            the userinfo string
	 * @param host
	 *            the host string
	 * @param port
	 *            the port number
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String userinfo, String host, int port)
			throws URIException {

		this(scheme, userinfo, host, port, null, null, null);
	}

	/**
	 * Construct a general URI from the given components.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param userinfo
	 *            the userinfo string
	 * @param host
	 *            the host string
	 * @param port
	 *            the port number
	 * @param path
	 *            the path string
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String userinfo, String host, int port,
			String path) throws URIException {

		this(scheme, userinfo, host, port, path, null, null);
	}

	/**
	 * Construct a general URI from the given components.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param userinfo
	 *            the userinfo string
	 * @param host
	 *            the host string
	 * @param port
	 *            the port number
	 * @param path
	 *            the path string
	 * @param query
	 *            the query string
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String userinfo, String host, int port,
			String path, String query) throws URIException {

		this(scheme, userinfo, host, port, path, query, null);
	}

	/**
	 * Construct a general URI from the given components.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param userinfo
	 *            the userinfo string
	 * @param host
	 *            the host string
	 * @param port
	 *            the port number
	 * @param path
	 *            the path string
	 * @param query
	 *            the query string
	 * @param fragment
	 *            the fragment string
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String userinfo, String host, int port,
			String path, String query, String fragment) throws URIException {

		this(scheme, (host == null) ? null
				: ((userinfo != null) ? userinfo + '@' : "") + host
						+ ((port != -1) ? ":" + port : ""), path, query,
				fragment);
	}

	/**
	 * Construct a general URI from the given components.
	 * 
	 * @param scheme
	 *            the scheme string
	 * @param host
	 *            the host string
	 * @param path
	 *            the path string
	 * @param fragment
	 *            the fragment string
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * @see #getDefaultProtocolCharset
	 */
	public URI(String scheme, String host, String path, String fragment)
			throws URIException {

		this(scheme, host, path, null, fragment);
	}

	/**
	 * Construct a general URI with the given relative URI string.
	 * 
	 * @param base
	 *            the base URI
	 * @param relative
	 *            the relative URI string
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * 
	 * @deprecated Use #URI(URI, String, boolean)
	 */
	public URI(URI base, String relative) throws URIException {
		this(base, new URI(relative));
	}

	/**
	 * Construct a general URI with the given relative URI string.
	 * 
	 * @param base
	 *            the base URI
	 * @param relative
	 *            the relative URI string
	 * @param escaped
	 *            <tt>true</tt> if URI character sequence is in escaped form.
	 *            <tt>false</tt> otherwise.
	 * 
	 * @throws URIException
	 *             If the new URI cannot be created.
	 * 
	 * @since 3.0
	 */
	public URI(URI base, String relative, boolean escaped) throws URIException {
		this(base, new URI(relative, escaped));
	}

	/**
	 * Construct a general URI with the given relative URI.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
	 *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * Resolving Relative References to Absolute Form.
	 * 
	 * <strong>Examples of Resolving Relative URI References</strong>
	 * 
	 * Within an object with a well-defined base URI of
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   http://a/b/c/d;p?q
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * the relative URI would be resolved as follows:
	 * 
	 * Normal Examples
	 * 
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   g:h           =  g:h
	 *   g             =  http://a/b/c/g
	 *   ./g           =  http://a/b/c/g
	 *   g/            =  http://a/b/c/g/
	 *   /g            =  http://a/g
	 *   //g           =  http://g
	 *   ?y            =  http://a/b/c/?y
	 *   g?y           =  http://a/b/c/g?y
	 *   #s            =  (current document)#s
	 *   g#s           =  http://a/b/c/g#s
	 *   g?y#s         =  http://a/b/c/g?y#s
	 *   ;x            =  http://a/b/c/;x
	 *   g;x           =  http://a/b/c/g;x
	 *   g;x?y#s       =  http://a/b/c/g;x?y#s
	 *   .             =  http://a/b/c/
	 *   ./            =  http://a/b/c/
	 *   ..            =  http://a/b/
	 *   ../           =  http://a/b/
	 *   ../g          =  http://a/b/g
	 *   ../..         =  http://a/
	 *   ../../        =  http://a/ 
	 *   ../../g       =  http://a/g
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * Some URI schemes do not allow a hierarchical syntax matching the
	 * <hier_part> syntax, and thus cannot use relative references.
	 * 
	 * @param base
	 *            the base URI
	 * @param relative
	 *            the relative URI
	 * @throws URIException
	 *             If the new URI cannot be created.
	 */
	public URI(URI base, URI relative) throws URIException {

		if (base._scheme == null) {
			throw new URIException(URIException.PARSING, "base URI required");
		}
		if (base._scheme != null) {
			this._scheme = base._scheme;
			this._authority = base._authority;
			this._is_net_path = base._is_net_path;
		}
		if (base._is_opaque_part || relative._is_opaque_part) {
			this._scheme = base._scheme;
			this._is_opaque_part = base._is_opaque_part
					|| relative._is_opaque_part;
			this._opaque = relative._opaque;
			this._fragment = relative._fragment;
			this.setURI();
			return;
		}
		boolean schemesEqual = Arrays.equals(base._scheme, relative._scheme);
		if (relative._scheme != null
				&& (!schemesEqual || relative._authority != null)) {
			this._scheme = relative._scheme;
			this._is_net_path = relative._is_net_path;
			this._authority = relative._authority;
			if (relative._is_server) {
				this._is_server = relative._is_server;
				this._userinfo = relative._userinfo;
				this._host = relative._host;
				this._port = relative._port;
			} else if (relative._is_reg_name) {
				this._is_reg_name = relative._is_reg_name;
			}
			this._is_abs_path = relative._is_abs_path;
			this._is_rel_path = relative._is_rel_path;
			this._path = relative._path;
		} else if (base._authority != null && relative._scheme == null) {
			this._is_net_path = base._is_net_path;
			this._authority = base._authority;
			if (base._is_server) {
				this._is_server = base._is_server;
				this._userinfo = base._userinfo;
				this._host = base._host;
				this._port = base._port;
			} else if (base._is_reg_name) {
				this._is_reg_name = base._is_reg_name;
			}
		}
		if (relative._authority != null) {
			this._is_net_path = relative._is_net_path;
			this._authority = relative._authority;
			if (relative._is_server) {
				this._is_server = relative._is_server;
				this._userinfo = relative._userinfo;
				this._host = relative._host;
				this._port = relative._port;
			} else if (relative._is_reg_name) {
				this._is_reg_name = relative._is_reg_name;
			}
			this._is_abs_path = relative._is_abs_path;
			this._is_rel_path = relative._is_rel_path;
			this._path = relative._path;
		}
		// resolve the path and query if necessary
		if (relative._authority == null
				&& (relative._scheme == null || schemesEqual)) {
			if ((relative._path == null || relative._path.length == 0)
					&& relative._query == null) {
				// handle a reference to the current document, see RFC 2396
				// section 5.2 step 2
				this._path = base._path;
				this._query = base._query;
			} else {
				this._path = resolvePath(base._path, relative._path);
			}
		}
		// base._query removed
		if (relative._query != null) {
			this._query = relative._query;
		}
		// base._fragment removed
		if (relative._fragment != null) {
			this._fragment = relative._fragment;
		}
		this.setURI();
		// reparse the newly built URI, this will ensure that all flags are set
		// correctly.
		// TODO there must be a better way to do this
		parseUriReference(new String(_uri), true);
	}

	// --------------------------------------------------- Instance Variables

	/** Version ID for serialization */
	static final long serialVersionUID = 604752400577948726L;

	/**
	 * Cache the hash code for this URI.
	 */
	protected int hash = 0;

	/**
	 * This Uniform Resource Identifier (URI). The URI is always in an "escaped"
	 * form, since escaping or unescaping a completed URI might change its
	 * semantics.
	 */
	protected char[] _uri = null;

	/**
	 * The charset of the protocol used by this URI instance.
	 */
	protected String protocolCharset = null;

	/**
	 * The default charset of the protocol. RFC 2277, 2396
	 */
	protected static String defaultProtocolCharset = "UTF-8";

	/**
	 * The default charset of the document. RFC 2277, 2396 The platform's
	 * charset is used for the document by default.
	 */
	protected static String defaultDocumentCharset = null;
	protected static String defaultDocumentCharsetByLocale = null;
	protected static String defaultDocumentCharsetByPlatform = null;
	// Static initializer for defaultDocumentCharset
	static {
		Locale locale = Locale.getDefault();
		// in order to support backward compatiblity
		if (locale != null) {
			defaultDocumentCharsetByLocale = LocaleToCharsetMap
					.getCharset(locale);
			// set the default document charset
			defaultDocumentCharset = defaultDocumentCharsetByLocale;
		}
		// in order to support platform encoding
		try {
			defaultDocumentCharsetByPlatform = System
					.getProperty("file.encoding");
		} catch (SecurityException ignore) {
		}
		if (defaultDocumentCharset == null) {
			// set the default document charset
			defaultDocumentCharset = defaultDocumentCharsetByPlatform;
		}
	}

	/**
	 * The scheme.
	 */
	protected char[] _scheme = null;

	/**
	 * The opaque.
	 */
	protected char[] _opaque = null;

	/**
	 * The authority.
	 */
	protected char[] _authority = null;

	/**
	 * The userinfo.
	 */
	protected char[] _userinfo = null;

	/**
	 * The host.
	 */
	protected char[] _host = null;

	/**
	 * The port.
	 */
	protected int _port = -1;

	/**
	 * The path.
	 */
	protected char[] _path = null;

	/**
	 * The query.
	 */
	protected char[] _query = null;

	/**
	 * The fragment.
	 */
	protected char[] _fragment = null;

	/**
	 * The root path.
	 */
	protected static final char[] rootPath = { '/' };

	// ---------------------- Generous characters for each component validation

	/**
	 * The percent "%" character always has the reserved purpose of being the
	 * escape indicator, it must be escaped as "%25" in order to be used as data
	 * within a URI.
	 */
	protected static final BitSet percent = new BitSet(256);
	// Static initializer for percent
	static {
		percent.set('%');
	}

	/**
	 * BitSet for digit.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * digit = &quot;0&quot; | &quot;1&quot; | &quot;2&quot; | &quot;3&quot; | &quot;4&quot; | &quot;5&quot; | &quot;6&quot; | &quot;7&quot; | &quot;8&quot; | &quot;9&quot;
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet digit = new BitSet(256);
	// Static initializer for digit
	static {
		for (int i = '0'; i <= '9'; i++) {
			digit.set(i);
		}
	}

	/**
	 * BitSet for alpha.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * alpha = lowalpha | upalpha
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet alpha = new BitSet(256);
	// Static initializer for alpha
	static {
		for (int i = 'a'; i <= 'z'; i++) {
			alpha.set(i);
		}
		for (int i = 'A'; i <= 'Z'; i++) {
			alpha.set(i);
		}
	}

	/**
	 * BitSet for alphanum (join of alpha &amp; digit).
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * alphanum = alpha | digit
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet alphanum = new BitSet(256);
	// Static initializer for alphanum
	static {
		alphanum.or(alpha);
		alphanum.or(digit);
	}

	/**
	 * BitSet for hex.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * hex = digit | &quot;A&quot; | &quot;B&quot; | &quot;C&quot; | &quot;D&quot; | &quot;E&quot; | &quot;F&quot; | &quot;a&quot; | &quot;b&quot; | &quot;c&quot; | &quot;d&quot; | &quot;e&quot;
	 * 		| &quot;f&quot;
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet hex = new BitSet(256);
	// Static initializer for hex
	static {
		hex.or(digit);
		for (int i = 'a'; i <= 'f'; i++) {
			hex.set(i);
		}
		for (int i = 'A'; i <= 'F'; i++) {
			hex.set(i);
		}
	}

	/**
	 * BitSet for escaped.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * escaped       = "%" hex hex
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet escaped = new BitSet(256);
	// Static initializer for escaped
	static {
		escaped.or(percent);
		escaped.or(hex);
	}

	/**
	 * BitSet for mark.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * mark = &quot;-&quot; | &quot;_&quot; | &quot;.&quot; | &quot;!&quot; | &quot;&tilde;&quot; | &quot;*&quot; | &quot;'&quot; | &quot;(&quot; | &quot;)&quot;
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet mark = new BitSet(256);
	// Static initializer for mark
	static {
		mark.set('-');
		mark.set('_');
		mark.set('.');
		mark.set('!');
		mark.set('~');
		mark.set('*');
		mark.set('\'');
		mark.set('(');
		mark.set(')');
	}

	/**
	 * Data characters that are allowed in a URI but do not have a reserved
	 * purpose are called unreserved.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * unreserved = alphanum | mark
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet unreserved = new BitSet(256);
	// Static initializer for unreserved
	static {
		unreserved.or(alphanum);
		unreserved.or(mark);
	}

	/**
	 * BitSet for reserved.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * reserved = &quot;;&quot; | &quot;/&quot; | &quot;?&quot; | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot; | &quot;$&quot; | &quot;,&quot;
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet reserved = new BitSet(256);
	// Static initializer for reserved
	static {
		reserved.set(';');
		reserved.set('/');
		reserved.set('?');
		reserved.set(':');
		reserved.set('@');
		reserved.set('&');
		reserved.set('=');
		reserved.set('+');
		reserved.set('$');
		reserved.set(',');
	}

	/**
	 * BitSet for uric.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * uric = reserved | unreserved | escaped
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet uric = new BitSet(256);
	// Static initializer for uric
	static {
		uric.or(reserved);
		uric.or(unreserved);
		uric.or(escaped);
	}

	/**
	 * BitSet for fragment (alias for uric).
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * fragment      = *uric
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet fragment = uric;

	/**
	 * BitSet for query (alias for uric).
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * query         = *uric
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet query = uric;

	/**
	 * BitSet for pchar.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * pchar = unreserved | escaped | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot; | &quot;$&quot; | &quot;,&quot;
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet pchar = new BitSet(256);
	// Static initializer for pchar
	static {
		pchar.or(unreserved);
		pchar.or(escaped);
		pchar.set(':');
		pchar.set('@');
		pchar.set('&');
		pchar.set('=');
		pchar.set('+');
		pchar.set('$');
		pchar.set(',');
	}

	/**
	 * BitSet for param (alias for pchar).
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * param         = *pchar
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet param = pchar;

	/**
	 * BitSet for segment.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * segment       = *pchar *( ";" param )
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet segment = new BitSet(256);
	// Static initializer for segment
	static {
		segment.or(pchar);
		segment.set(';');
		segment.or(param);
	}

	/**
	 * BitSet for path segments.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * path_segments = segment *( "/" segment )
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet path_segments = new BitSet(256);
	// Static initializer for path_segments
	static {
		path_segments.set('/');
		path_segments.or(segment);
	}

	/**
	 * URI absolute path.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * abs_path      = "/"  path_segments
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet abs_path = new BitSet(256);
	// Static initializer for abs_path
	static {
		abs_path.set('/');
		abs_path.or(path_segments);
	}

	/**
	 * URI bitset for encoding typical non-slash characters.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * uric_no_slash = unreserved | escaped | &quot;;&quot; | &quot;?&quot; | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot;
	 * 		| &quot;$&quot; | &quot;,&quot;
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet uric_no_slash = new BitSet(256);
	// Static initializer for uric_no_slash
	static {
		uric_no_slash.or(unreserved);
		uric_no_slash.or(escaped);
		uric_no_slash.set(';');
		uric_no_slash.set('?');
		uric_no_slash.set(';');
		uric_no_slash.set('@');
		uric_no_slash.set('&');
		uric_no_slash.set('=');
		uric_no_slash.set('+');
		uric_no_slash.set('$');
		uric_no_slash.set(',');
	}

	/**
	 * URI bitset that combines uric_no_slash and uric.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * opaque_part = uric_no_slash * uric
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet opaque_part = new BitSet(256);
	// Static initializer for opaque_part
	static {
		// it's generous. because first character must not include a slash
		opaque_part.or(uric_no_slash);
		opaque_part.or(uric);
	}

	/**
	 * URI bitset that combines absolute path and opaque part.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * path          = [ abs_path | opaque_part ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet path = new BitSet(256);
	// Static initializer for path
	static {
		path.or(abs_path);
		path.or(opaque_part);
	}

	/**
	 * Port, a logical alias for digit.
	 */
	protected static final BitSet port = digit;

	/**
	 * Bitset that combines digit and dot fo IPv$address.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet IPv4address = new BitSet(256);
	// Static initializer for IPv4address
	static {
		IPv4address.or(digit);
		IPv4address.set('.');
	}

	/**
	 * RFC 2373.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * IPv6address = hexpart [ ":" IPv4address ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet IPv6address = new BitSet(256);
	// Static initializer for IPv6address reference
	static {
		IPv6address.or(hex); // hexpart
		IPv6address.set(':');
		IPv6address.or(IPv4address);
	}

	/**
	 * RFC 2732, 2373.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * IPv6reference   = "[" IPv6address "]"
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet IPv6reference = new BitSet(256);
	// Static initializer for IPv6reference
	static {
		IPv6reference.set('[');
		IPv6reference.or(IPv6address);
		IPv6reference.set(']');
	}

	/**
	 * BitSet for toplabel.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet toplabel = new BitSet(256);
	// Static initializer for toplabel
	static {
		toplabel.or(alphanum);
		toplabel.set('-');
	}

	/**
	 * BitSet for domainlabel.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet domainlabel = toplabel;

	/**
	 * BitSet for hostname.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * hostname      = *( domainlabel "." ) toplabel [ "." ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet hostname = new BitSet(256);
	// Static initializer for hostname
	static {
		hostname.or(toplabel);
		// hostname.or(domainlabel);
		hostname.set('.');
	}

	/**
	 * BitSet for host.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * host = hostname | IPv4address | IPv6reference
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet host = new BitSet(256);
	// Static initializer for host
	static {
		host.or(hostname);
		// host.or(IPv4address);
		host.or(IPv6reference); // IPv4address
	}

	/**
	 * BitSet for hostport.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * hostport      = host [ ":" port ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet hostport = new BitSet(256);
	// Static initializer for hostport
	static {
		hostport.or(host);
		hostport.set(':');
		hostport.or(port);
	}

	/**
	 * Bitset for userinfo.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * userinfo      = *( unreserved | escaped |
	 *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet userinfo = new BitSet(256);
	// Static initializer for userinfo
	static {
		userinfo.or(unreserved);
		userinfo.or(escaped);
		userinfo.set(';');
		userinfo.set(':');
		userinfo.set('&');
		userinfo.set('=');
		userinfo.set('+');
		userinfo.set('$');
		userinfo.set(',');
	}

	/**
	 * BitSet for within the userinfo component like user and password.
	 */
	public static final BitSet within_userinfo = new BitSet(256);
	// Static initializer for within_userinfo
	static {
		within_userinfo.or(userinfo);
		within_userinfo.clear(';'); // reserved within authority
		within_userinfo.clear(':');
		within_userinfo.clear('@');
		within_userinfo.clear('?');
		within_userinfo.clear('/');
	}

	/**
	 * Bitset for server.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * server        = [ [ userinfo "@" ] hostport ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet server = new BitSet(256);
	// Static initializer for server
	static {
		server.or(userinfo);
		server.set('@');
		server.or(hostport);
	}

	/**
	 * BitSet for reg_name.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * reg_name = 1 * (unreserved | escaped | &quot;$&quot; | &quot;,&quot; | &quot;;&quot; | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot;)
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet reg_name = new BitSet(256);
	// Static initializer for reg_name
	static {
		reg_name.or(unreserved);
		reg_name.or(escaped);
		reg_name.set('$');
		reg_name.set(',');
		reg_name.set(';');
		reg_name.set(':');
		reg_name.set('@');
		reg_name.set('&');
		reg_name.set('=');
		reg_name.set('+');
	}

	/**
	 * BitSet for authority.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * authority = server | reg_name
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet authority = new BitSet(256);
	// Static initializer for authority
	static {
		authority.or(server);
		authority.or(reg_name);
	}

	/**
	 * BitSet for scheme.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * scheme = alpha * (alpha | digit | &quot;+&quot; | &quot;-&quot; | &quot;.&quot;)
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet scheme = new BitSet(256);
	// Static initializer for scheme
	static {
		scheme.or(alpha);
		scheme.or(digit);
		scheme.set('+');
		scheme.set('-');
		scheme.set('.');
	}

	/**
	 * BitSet for rel_segment.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * rel_segment = 1 * (unreserved | escaped | &quot;;&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot; | &quot;$&quot; | &quot;,&quot;)
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet rel_segment = new BitSet(256);
	// Static initializer for rel_segment
	static {
		rel_segment.or(unreserved);
		rel_segment.or(escaped);
		rel_segment.set(';');
		rel_segment.set('@');
		rel_segment.set('&');
		rel_segment.set('=');
		rel_segment.set('+');
		rel_segment.set('$');
		rel_segment.set(',');
	}

	/**
	 * BitSet for rel_path.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * rel_path = rel_segment[abs_path]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet rel_path = new BitSet(256);
	// Static initializer for rel_path
	static {
		rel_path.or(rel_segment);
		rel_path.or(abs_path);
	}

	/**
	 * BitSet for net_path.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * net_path      = "//" authority [ abs_path ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet net_path = new BitSet(256);
	// Static initializer for net_path
	static {
		net_path.set('/');
		net_path.or(authority);
		net_path.or(abs_path);
	}

	/**
	 * BitSet for hier_part.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * hier_part     = ( net_path | abs_path ) [ "?" query ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet hier_part = new BitSet(256);
	// Static initializer for hier_part
	static {
		hier_part.or(net_path);
		hier_part.or(abs_path);
		// hier_part.set('?'); aleady included
		hier_part.or(query);
	}

	/**
	 * BitSet for relativeURI.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet relativeURI = new BitSet(256);
	// Static initializer for relativeURI
	static {
		relativeURI.or(net_path);
		relativeURI.or(abs_path);
		relativeURI.or(rel_path);
		// relativeURI.set('?'); aleady included
		relativeURI.or(query);
	}

	/**
	 * BitSet for absoluteURI.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * absoluteURI   = scheme ":" ( hier_part | opaque_part )
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet absoluteURI = new BitSet(256);
	// Static initializer for absoluteURI
	static {
		absoluteURI.or(scheme);
		absoluteURI.set(':');
		absoluteURI.or(hier_part);
		absoluteURI.or(opaque_part);
	}

	/**
	 * BitSet for URI-reference.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 */
	protected static final BitSet URI_reference = new BitSet(256);
	// Static initializer for URI_reference
	static {
		URI_reference.or(absoluteURI);
		URI_reference.or(relativeURI);
		URI_reference.set('#');
		URI_reference.or(fragment);
	}

	// ---------------------------- Characters disallowed within the URI syntax
	// Excluded US-ASCII Characters are like control, space, delims and unwise

	/**
	 * BitSet for control.
	 */
	public static final BitSet control = new BitSet(256);
	// Static initializer for control
	static {
		for (int i = 0; i <= 0x1F; i++) {
			control.set(i);
		}
		control.set(0x7F);
	}

	/**
	 * BitSet for space.
	 */
	public static final BitSet space = new BitSet(256);
	// Static initializer for space
	static {
		space.set(0x20);
	}

	/**
	 * BitSet for delims.
	 */
	public static final BitSet delims = new BitSet(256);
	// Static initializer for delims
	static {
		delims.set('<');
		delims.set('>');
		delims.set('#');
		delims.set('%');
		delims.set('"');
	}

	/**
	 * BitSet for unwise.
	 */
	public static final BitSet unwise = new BitSet(256);
	// Static initializer for unwise
	static {
		unwise.set('{');
		unwise.set('}');
		unwise.set('|');
		unwise.set('\\');
		unwise.set('^');
		unwise.set('[');
		unwise.set(']');
		unwise.set('`');
	}

	/**
	 * Disallowed rel_path before escaping.
	 */
	public static final BitSet disallowed_rel_path = new BitSet(256);
	// Static initializer for disallowed_rel_path
	static {
		disallowed_rel_path.or(uric);
		disallowed_rel_path.andNot(rel_path);
	}

	/**
	 * Disallowed opaque_part before escaping.
	 */
	public static final BitSet disallowed_opaque_part = new BitSet(256);
	// Static initializer for disallowed_opaque_part
	static {
		disallowed_opaque_part.or(uric);
		disallowed_opaque_part.andNot(opaque_part);
	}

	// ----------------------- Characters allowed within and for each component

	/**
	 * Those characters that are allowed for the authority component.
	 */
	public static final BitSet allowed_authority = new BitSet(256);
	// Static initializer for allowed_authority
	static {
		allowed_authority.or(authority);
		allowed_authority.clear('%');
	}

	/**
	 * Those characters that are allowed for the opaque_part.
	 */
	public static final BitSet allowed_opaque_part = new BitSet(256);
	// Static initializer for allowed_opaque_part
	static {
		allowed_opaque_part.or(opaque_part);
		allowed_opaque_part.clear('%');
	}

	/**
	 * Those characters that are allowed for the reg_name.
	 */
	public static final BitSet allowed_reg_name = new BitSet(256);
	// Static initializer for allowed_reg_name
	static {
		allowed_reg_name.or(reg_name);
		// allowed_reg_name.andNot(percent);
		allowed_reg_name.clear('%');
	}

	/**
	 * Those characters that are allowed for the userinfo component.
	 */
	public static final BitSet allowed_userinfo = new BitSet(256);
	// Static initializer for allowed_userinfo
	static {
		allowed_userinfo.or(userinfo);
		// allowed_userinfo.andNot(percent);
		allowed_userinfo.clear('%');
	}

	/**
	 * Those characters that are allowed for within the userinfo component.
	 */
	public static final BitSet allowed_within_userinfo = new BitSet(256);
	// Static initializer for allowed_within_userinfo
	static {
		allowed_within_userinfo.or(within_userinfo);
		allowed_within_userinfo.clear('%');
	}

	/**
	 * Those characters that are allowed for the IPv6reference component. The
	 * characters '[', ']' in IPv6reference should be excluded.
	 */
	public static final BitSet allowed_IPv6reference = new BitSet(256);
	// Static initializer for allowed_IPv6reference
	static {
		allowed_IPv6reference.or(IPv6reference);
		// allowed_IPv6reference.andNot(unwise);
		allowed_IPv6reference.clear('[');
		allowed_IPv6reference.clear(']');
	}

	/**
	 * Those characters that are allowed for the host component. The characters
	 * '[', ']' in IPv6reference should be excluded.
	 */
	public static final BitSet allowed_host = new BitSet(256);
	// Static initializer for allowed_host
	static {
		allowed_host.or(hostname);
		allowed_host.or(allowed_IPv6reference);
	}

	/**
	 * Those characters that are allowed for the authority component.
	 */
	public static final BitSet allowed_within_authority = new BitSet(256);
	// Static initializer for allowed_within_authority
	static {
		allowed_within_authority.or(server);
		allowed_within_authority.or(reg_name);
		allowed_within_authority.clear(';');
		allowed_within_authority.clear(':');
		allowed_within_authority.clear('@');
		allowed_within_authority.clear('?');
		allowed_within_authority.clear('/');
	}

	/**
	 * Those characters that are allowed for the abs_path.
	 */
	public static final BitSet allowed_abs_path = new BitSet(256);
	// Static initializer for allowed_abs_path
	static {
		allowed_abs_path.or(abs_path);
		// allowed_abs_path.set('/'); // aleady included
		allowed_abs_path.andNot(percent);
		allowed_abs_path.clear('+');
	}

	/**
	 * Those characters that are allowed for the rel_path.
	 */
	public static final BitSet allowed_rel_path = new BitSet(256);
	// Static initializer for allowed_rel_path
	static {
		allowed_rel_path.or(rel_path);
		allowed_rel_path.clear('%');
		allowed_rel_path.clear('+');
	}

	/**
	 * Those characters that are allowed within the path.
	 */
	public static final BitSet allowed_within_path = new BitSet(256);
	// Static initializer for allowed_within_path
	static {
		allowed_within_path.or(abs_path);
		allowed_within_path.clear('/');
		allowed_within_path.clear(';');
		allowed_within_path.clear('=');
		allowed_within_path.clear('?');
	}

	/**
	 * Those characters that are allowed for the query component.
	 */
	public static final BitSet allowed_query = new BitSet(256);
	// Static initializer for allowed_query
	static {
		allowed_query.or(uric);
		allowed_query.clear('%');
	}

	/**
	 * Those characters that are allowed within the query component.
	 */
	public static final BitSet allowed_within_query = new BitSet(256);
	// Static initializer for allowed_within_query
	static {
		allowed_within_query.or(allowed_query);
		allowed_within_query.andNot(reserved); // excluded 'reserved'
	}

	/**
	 * Those characters that are allowed for the fragment component.
	 */
	public static final BitSet allowed_fragment = new BitSet(256);
	// Static initializer for allowed_fragment
	static {
		allowed_fragment.or(uric);
		allowed_fragment.clear('%');
	}

	// ------------------------------------------- Flags for this URI-reference

	// TODO: Figure out what all these variables are for and provide javadoc

	// URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
	// absoluteURI = scheme ":" ( hier_part | opaque_part )
	protected boolean _is_hier_part;
	protected boolean _is_opaque_part;
	// relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
	// hier_part = ( net_path | abs_path ) [ "?" query ]
	protected boolean _is_net_path;
	protected boolean _is_abs_path;
	protected boolean _is_rel_path;
	// net_path = "//" authority [ abs_path ]
	// authority = server | reg_name
	protected boolean _is_reg_name;
	protected boolean _is_server; // = _has_server
	// server = [ [ userinfo "@" ] hostport ]
	// host = hostname | IPv4address | IPv6reference
	protected boolean _is_hostname;
	protected boolean _is_IPv4address;
	protected boolean _is_IPv6reference;

	// ------------------------------------------ Character and escape encoding

	/**
	 * Encodes URI string.
	 * 
	 * This is a two mapping, one from original characters to octets, and
	 * subsequently a second from octets to URI characters:
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   original character sequence->octet sequence->URI character sequence
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * An escaped octet is encoded as a character triplet, consisting of the
	 * percent character "%" followed by the two hexadecimal digits representing
	 * the octet code. For example, "%20" is the escaped encoding for the
	 * US-ASCII space character.
	 * <p>
	 * Conversion from the local filesystem character set to UTF-8 will normally
	 * involve a two step process. First convert the local character set to the
	 * UCS; then convert the UCS to UTF-8. The first step in the process can be
	 * performed by maintaining a mapping table that includes the local
	 * character set code and the corresponding UCS code. The next step is to
	 * convert the UCS character code to the UTF-8 encoding.
	 * <p>
	 * Mapping between vendor codepages can be done in a very similar manner as
	 * described above.
	 * <p>
	 * The only time escape encodings can allowedly be made is when a URI is
	 * being created from its component parts. The escape and validate methods
	 * are internally performed within this method.
	 * 
	 * @param original
	 *            the original character sequence
	 * @param allowed
	 *            those characters that are allowed within a component
	 * @param charset
	 *            the protocol charset
	 * @return URI character sequence
	 * @throws URIException
	 *             null component or unsupported character encoding
	 */

	protected static char[] encode(String original, BitSet allowed,
			String charset) throws URIException {
		if (original == null) {
			throw new IllegalArgumentException(
					"Original string may not be null");
		}
		if (allowed == null) {
			throw new IllegalArgumentException("Allowed bitset may not be null");
		}
		byte[] rawdata = URLCodec.encodeUrl(allowed,
				EncodingUtil.getBytes(original, charset));
		return EncodingUtil.getAsciiString(rawdata).toCharArray();
	}

	/**
	 * Decodes URI encoded string.
	 * 
	 * This is a two mapping, one from URI characters to octets, and
	 * subsequently a second from octets to original characters:
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   URI character sequence->octet sequence->original character sequence
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * A URI must be separated into its components before the escaped characters
	 * within those components can be allowedly decoded.
	 * <p>
	 * Notice that there is a chance that URI characters that are non UTF-8 may
	 * be parsed as valid UTF-8. A recent non-scientific analysis found that EUC
	 * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false
	 * reading; other encoding such as ASCII or KOI-8 have a 0% false reading.
	 * <p>
	 * The percent "%" character always has the reserved purpose of being the
	 * escape indicator, it must be escaped as "%25" in order to be used as data
	 * within a URI.
	 * <p>
	 * The unescape method is internally performed within this method.
	 * 
	 * @param component
	 *            the URI character sequence
	 * @param charset
	 *            the protocol charset
	 * @return original character sequence
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 */
	protected static String decode(char[] component, String charset)
			throws URIException {
		if (component == null) {
			throw new IllegalArgumentException(
					"Component array of chars may not be null");
		}
		return decode(new String(component), charset);
	}

	/**
	 * Decodes URI encoded string.
	 * 
	 * This is a two mapping, one from URI characters to octets, and
	 * subsequently a second from octets to original characters:
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   URI character sequence->octet sequence->original character sequence
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * A URI must be separated into its components before the escaped characters
	 * within those components can be allowedly decoded.
	 * <p>
	 * Notice that there is a chance that URI characters that are non UTF-8 may
	 * be parsed as valid UTF-8. A recent non-scientific analysis found that EUC
	 * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false
	 * reading; other encoding such as ASCII or KOI-8 have a 0% false reading.
	 * <p>
	 * The percent "%" character always has the reserved purpose of being the
	 * escape indicator, it must be escaped as "%25" in order to be used as data
	 * within a URI.
	 * <p>
	 * The unescape method is internally performed within this method.
	 * 
	 * @param component
	 *            the URI character sequence
	 * @param charset
	 *            the protocol charset
	 * @return original character sequence
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * 
	 * @since 3.0
	 */
	protected static String decode(String component, String charset)
			throws URIException {
		if (component == null) {
			throw new IllegalArgumentException(
					"Component array of chars may not be null");
		}
		byte[] rawdata = null;
		try {
			rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
		} catch (DecoderException e) {
			throw new URIException(e.getMessage());
		}
		return EncodingUtil.getString(rawdata, charset);
	}

	/**
	 * Pre-validate the unescaped URI string within a specific component.
	 * 
	 * @param component
	 *            the component string within the component
	 * @param disallowed
	 *            those characters disallowed within the component
	 * @return if true, it doesn't have the disallowed characters if false, the
	 *         component is undefined or an incorrect one
	 */
	protected boolean prevalidate(String component, BitSet disallowed) {
		// prevalidate the given component by disallowed characters
		if (component == null) {
			return false; // undefined
		}
		char[] target = component.toCharArray();
		for (int i = 0; i < target.length; i++) {
			if (disallowed.get(target[i])) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Validate the URI characters within a specific component. The component
	 * must be performed after escape encoding. Or it doesn't include escaped
	 * characters.
	 * 
	 * @param component
	 *            the characters sequence within the component
	 * @param generous
	 *            those characters that are allowed within a component
	 * @return if true, it's the correct URI character sequence
	 */
	protected boolean validate(char[] component, BitSet generous) {
		// validate each component by generous characters
		return validate(component, 0, -1, generous);
	}

	/**
	 * Validate the URI characters within a specific component. The component
	 * must be performed after escape encoding. Or it doesn't include escaped
	 * characters.
	 * <p>
	 * It's not that much strict, generous. The strict validation might be
	 * performed before being called this method.
	 * 
	 * @param component
	 *            the characters sequence within the component
	 * @param soffset
	 *            the starting offset of the given component
	 * @param eoffset
	 *            the ending offset of the given component if -1, it means the
	 *            length of the component
	 * @param generous
	 *            those characters that are allowed within a component
	 * @return if true, it's the correct URI character sequence
	 */
	protected boolean validate(char[] component, int soffset, int eoffset,
			BitSet generous) {
		// validate each component by generous characters
		if (eoffset == -1) {
			eoffset = component.length - 1;
		}
		for (int i = soffset; i <= eoffset; i++) {
			if (!generous.get(component[i])) {
				return false;
			}
		}
		return true;
	}

	/**
	 * In order to avoid any possilbity of conflict with non-ASCII characters,
	 * Parse a URI reference as a <code>String</code> with the character
	 * encoding of the local system or the document.
	 * <p>
	 * The following line is the regular expression for breaking-down a URI
	 * reference into its components.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
	 *    12            3  4          5       6  7        8 9
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * For example, matching the above expression to
	 * http://jakarta.apache.org/ietf/uri/#Related results in the following
	 * subexpression matches:
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *               $1 = http:
	 *  scheme    =  $2 = http
	 *               $3 = //jakarta.apache.org
	 *  authority =  $4 = jakarta.apache.org
	 *  path      =  $5 = /ietf/uri/
	 *               $6 = <undefined>
	 *  query     =  $7 = <undefined>
	 *               $8 = #Related
	 *  fragment  =  $9 = Related
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @param original
	 *            the original character sequence
	 * @param escaped
	 *            <code>true</code> if <code>original</code> is escaped
	 * @throws URIException
	 *             If an error occurs.
	 */
	protected void parseUriReference(String original, boolean escaped)
			throws URIException {

		// validate and contruct the URI character sequence
		if (original == null) {
			throw new URIException("URI-Reference required");
		}

		/*
		 * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 */
		String tmp = original.trim();

		/*
		 * The length of the string sequence of characters. It may not be equal
		 * to the length of the byte array.
		 */
		int length = tmp.length();

		/*
		 * Remove the delimiters like angle brackets around an URI.
		 */
		if (length > 0) {
			char[] firstDelimiter = { tmp.charAt(0) };
			if (validate(firstDelimiter, delims)) {
				if (length >= 2) {
					char[] lastDelimiter = { tmp.charAt(length - 1) };
					if (validate(lastDelimiter, delims)) {
						tmp = tmp.substring(1, length - 1);
						length = length - 2;
					}
				}
			}
		}

		/*
		 * The starting index
		 */
		int from = 0;

		/*
		 * The test flag whether the URI is started from the path component.
		 */
		boolean isStartedFromPath = false;
		int atColon = tmp.indexOf(':');
		int atSlash = tmp.indexOf('/');
		if ((atColon <= 0 && !tmp.startsWith("//"))
				|| (atSlash >= 0 && atSlash < atColon)) {
			isStartedFromPath = true;
		}

		/*
		 * <p><blockquote><pre>
		 * 
		 * @@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 * </pre></blockquote><p>
		 */
		int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
		if (at == -1) {
			at = 0;
		}

		/*
		 * Parse the scheme. <p><blockquote><pre> scheme = $2 = http
		 * 
		 * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 * </pre></blockquote><p>
		 */
		if (at > 0 && at < length && tmp.charAt(at) == ':') {
			char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
			if (validate(target, scheme)) {
				_scheme = target;
			} else {
				throw new URIException("incorrect scheme");
			}
			from = ++at;
		}

		/*
		 * Parse the authority component. <p><blockquote><pre> authority = $4 =
		 * jakarta.apache.org
		 * 
		 * @@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 * </pre></blockquote><p>
		 */
		// Reset flags
		_is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
		if (0 <= at && at < length && tmp.charAt(at) == '/') {
			// Set flag
			_is_hier_part = true;
			if (at + 2 < length && tmp.charAt(at + 1) == '/'
					&& !isStartedFromPath) {
				// the temporary index to start the search from
				int next = indexFirstOf(tmp, "/?#", at + 2);
				if (next == -1) {
					next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp
							.length();
				}
				parseAuthority(tmp.substring(at + 2, next), escaped);
				from = at = next;
				// Set flag
				_is_net_path = true;
			}
			if (from == at) {
				// Set flag
				_is_abs_path = true;
			}
		}

		/*
		 * Parse the path component. <p><blockquote><pre> path = $5 = /ietf/uri/
		 * 
		 * @@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 * </pre></blockquote><p>
		 */
		if (from < length) {
			// rel_path = rel_segment [ abs_path ]
			int next = indexFirstOf(tmp, "?#", from);
			if (next == -1) {
				next = tmp.length();
			}
			if (!_is_abs_path) {
				if (!escaped
						&& prevalidate(tmp.substring(from, next),
								disallowed_rel_path)
						|| escaped
						&& validate(tmp.substring(from, next).toCharArray(),
								rel_path)) {
					// Set flag
					_is_rel_path = true;
				} else if (!escaped
						&& prevalidate(tmp.substring(from, next),
								disallowed_opaque_part)
						|| escaped
						&& validate(tmp.substring(from, next).toCharArray(),
								opaque_part)) {
					// Set flag
					_is_opaque_part = true;
				} else {
					// the path component may be empty
					_path = null;
				}
			}
			String s = tmp.substring(from, next);
			if (escaped) {
				setRawPath(s.toCharArray());
			} else {
				setPath(s);
			}
			at = next;
		}

		// set the charset to do escape encoding
		String charset = getProtocolCharset();

		/*
		 * Parse the query component. <p><blockquote><pre> query = $7 =
		 * <undefined>
		 * 
		 * @@@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 * </pre></blockquote><p>
		 */
		if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
			int next = tmp.indexOf('#', at + 1);
			if (next == -1) {
				next = tmp.length();
			}
			if (escaped) {
				_query = tmp.substring(at + 1, next).toCharArray();
				if (!validate(_query, uric)) {
					throw new URIException("Invalid query");
				}
			} else {
				_query = encode(tmp.substring(at + 1, next), allowed_query,
						charset);
			}
			at = next;
		}

		/*
		 * Parse the fragment component. <p><blockquote><pre> fragment = $9 =
		 * Related
		 * 
		 * @@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		 * </pre></blockquote><p>
		 */
		if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
			if (at + 1 == length) { // empty fragment
				_fragment = "".toCharArray();
			} else {
				_fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
						: encode(tmp.substring(at + 1), allowed_fragment,
								charset);
			}
		}

		// set this URI.
		setURI();
	}

	/**
	 * Get the earlier index that to be searched for the first occurrance in one
	 * of any of the given string.
	 * 
	 * @param s
	 *            the string to be indexed
	 * @param delims
	 *            the delimiters used to index
	 * @return the earlier index if there are delimiters
	 */
	protected int indexFirstOf(String s, String delims) {
		return indexFirstOf(s, delims, -1);
	}

	/**
	 * Get the earlier index that to be searched for the first occurrance in one
	 * of any of the given string.
	 * 
	 * @param s
	 *            the string to be indexed
	 * @param delims
	 *            the delimiters used to index
	 * @param offset
	 *            the from index
	 * @return the earlier index if there are delimiters
	 */
	protected int indexFirstOf(String s, String delims, int offset) {
		if (s == null || s.length() == 0) {
			return -1;
		}
		if (delims == null || delims.length() == 0) {
			return -1;
		}
		// check boundaries
		if (offset < 0) {
			offset = 0;
		} else if (offset > s.length()) {
			return -1;
		}
		// s is never null
		int min = s.length();
		char[] delim = delims.toCharArray();
		for (int i = 0; i < delim.length; i++) {
			int at = s.indexOf(delim[i], offset);
			if (at >= 0 && at < min) {
				min = at;
			}
		}
		return (min == s.length()) ? -1 : min;
	}

	/**
	 * Get the earlier index that to be searched for the first occurrance in one
	 * of any of the given array.
	 * 
	 * @param s
	 *            the character array to be indexed
	 * @param delim
	 *            the delimiter used to index
	 * @return the ealier index if there are a delimiter
	 */
	protected int indexFirstOf(char[] s, char delim) {
		return indexFirstOf(s, delim, 0);
	}

	/**
	 * Get the earlier index that to be searched for the first occurrance in one
	 * of any of the given array.
	 * 
	 * @param s
	 *            the character array to be indexed
	 * @param delim
	 *            the delimiter used to index
	 * @param offset
	 *            The offset.
	 * @return the ealier index if there is a delimiter
	 */
	protected int indexFirstOf(char[] s, char delim, int offset) {
		if (s == null || s.length == 0) {
			return -1;
		}
		// check boundaries
		if (offset < 0) {
			offset = 0;
		} else if (offset > s.length) {
			return -1;
		}
		for (int i = offset; i < s.length; i++) {
			if (s[i] == delim) {
				return i;
			}
		}
		return -1;
	}

	/**
	 * Parse the authority component.
	 * 
	 * @param original
	 *            the original character sequence of authority component
	 * @param escaped
	 *            <code>true</code> if <code>original</code> is escaped
	 * @throws URIException
	 *             If an error occurs.
	 */
	protected void parseAuthority(String original, boolean escaped)
			throws URIException {

		// Reset flags
		_is_reg_name = _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false;

		// set the charset to do escape encoding
		String charset = getProtocolCharset();

		boolean hasPort = true;
		int from = 0;
		int next = original.indexOf('@');
		if (next != -1) { // neither -1 and 0
			// each protocol extented from URI supports the specific userinfo
			_userinfo = (escaped) ? original.substring(0, next).toCharArray()
					: encode(original.substring(0, next), allowed_userinfo,
							charset);
			from = next + 1;
		}
		next = original.indexOf('[', from);
		if (next >= from) {
			next = original.indexOf(']', from);
			if (next == -1) {
				throw new URIException(URIException.PARSING, "IPv6reference");
			} else {
				next++;
			}
			// In IPv6reference, '[', ']' should be excluded
			_host = (escaped) ? original.substring(from, next).toCharArray()
					: encode(original.substring(from, next),
							allowed_IPv6reference, charset);
			// Set flag
			_is_IPv6reference = true;
		} else { // only for !_is_IPv6reference
			next = original.indexOf(':', from);
			if (next == -1) {
				next = original.length();
				hasPort = false;
			}
			// REMINDME: it doesn't need the pre-validation
			_host = original.substring(from, next).toCharArray();
			if (validate(_host, IPv4address)) {
				// Set flag
				_is_IPv4address = true;
			} else if (validate(_host, hostname)) {
				// Set flag
				_is_hostname = true;
			} else {
				// Set flag
				_is_reg_name = true;
			}
		}
		if (_is_reg_name) {
			// Reset flags for a server-based naming authority
			_is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false;
			// set a registry-based naming authority
			if (escaped) {
				_authority = original.toCharArray();
				if (!validate(_authority, reg_name)) {
					throw new URIException("Invalid authority");
				}
			} else {
				_authority = encode(original, allowed_reg_name, charset);
			}
		} else {
			if (original.length() - 1 > next && hasPort
					&& original.charAt(next) == ':') { // not empty
				from = next + 1;
				try {
					_port = Integer.parseInt(original.substring(from));
				} catch (NumberFormatException error) {
					throw new URIException(URIException.PARSING,
							"invalid port number");
				}
			}
			// set a server-based naming authority
			StringBuffer buf = new StringBuffer();
			if (_userinfo != null) { // has_userinfo
				buf.append(_userinfo);
				buf.append('@');
			}
			if (_host != null) {
				buf.append(_host);
				if (_port != -1) {
					buf.append(':');
					buf.append(_port);
				}
			}
			_authority = buf.toString().toCharArray();
			// Set flag
			_is_server = true;
		}
	}

	/**
	 * Once it's parsed successfully, set this URI.
	 * 
	 * @see #getRawURI
	 */
	protected void setURI() {
		// set _uri
		StringBuffer buf = new StringBuffer();
		// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
		if (_scheme != null) {
			buf.append(_scheme);
			buf.append(':');
		}
		if (_is_net_path) {
			buf.append("//");
			if (_authority != null) { // has_authority
				buf.append(_authority);
			}
		}
		if (_opaque != null && _is_opaque_part) {
			buf.append(_opaque);
		} else if (_path != null) {
			// _is_hier_part or _is_relativeURI
			if (_path.length != 0) {
				buf.append(_path);
			}
		}
		if (_query != null) { // has_query
			buf.append('?');
			buf.append(_query);
		}
		// ignore the fragment identifier
		_uri = buf.toString().toCharArray();
		hash = 0;
	}

	// ----------------------------------------------------------- Test methods

	/**
	 * Tell whether or not this URI is absolute.
	 * 
	 * @return true iif this URI is absoluteURI
	 */
	public boolean isAbsoluteURI() {
		return (_scheme != null);
	}

	/**
	 * Tell whether or not this URI is relative.
	 * 
	 * @return true iif this URI is relativeURI
	 */
	public boolean isRelativeURI() {
		return (_scheme == null);
	}

	/**
	 * Tell whether or not the absoluteURI of this URI is hier_part.
	 * 
	 * @return true iif the absoluteURI is hier_part
	 */
	public boolean isHierPart() {
		return _is_hier_part;
	}

	/**
	 * Tell whether or not the absoluteURI of this URI is opaque_part.
	 * 
	 * @return true iif the absoluteURI is opaque_part
	 */
	public boolean isOpaquePart() {
		return _is_opaque_part;
	}

	/**
	 * Tell whether or not the relativeURI or heir_part of this URI is net_path.
	 * It's the same function as the has_authority() method.
	 * 
	 * @return true iif the relativeURI or heir_part is net_path
	 * @see #hasAuthority
	 */
	public boolean isNetPath() {
		return _is_net_path || (_authority != null);
	}

	/**
	 * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
	 * 
	 * @return true iif the relativeURI or hier_part is abs_path
	 */
	public boolean isAbsPath() {
		return _is_abs_path;
	}

	/**
	 * Tell whether or not the relativeURI of this URI is rel_path.
	 * 
	 * @return true iif the relativeURI is rel_path
	 */
	public boolean isRelPath() {
		return _is_rel_path;
	}

	/**
	 * Tell whether or not this URI has authority. It's the same function as the
	 * is_net_path() method.
	 * 
	 * @return true iif this URI has authority
	 * @see #isNetPath
	 */
	public boolean hasAuthority() {
		return (_authority != null) || _is_net_path;
	}

	/**
	 * Tell whether or not the authority component of this URI is reg_name.
	 * 
	 * @return true iif the authority component is reg_name
	 */
	public boolean isRegName() {
		return _is_reg_name;
	}

	/**
	 * Tell whether or not the authority component of this URI is server.
	 * 
	 * @return true iif the authority component is server
	 */
	public boolean isServer() {
		return _is_server;
	}

	/**
	 * Tell whether or not this URI has userinfo.
	 * 
	 * @return true iif this URI has userinfo
	 */
	public boolean hasUserinfo() {
		return (_userinfo != null);
	}

	/**
	 * Tell whether or not the host part of this URI is hostname.
	 * 
	 * @return true iif the host part is hostname
	 */
	public boolean isHostname() {
		return _is_hostname;
	}

	/**
	 * Tell whether or not the host part of this URI is IPv4address.
	 * 
	 * @return true iif the host part is IPv4address
	 */
	public boolean isIPv4address() {
		return _is_IPv4address;
	}

	/**
	 * Tell whether or not the host part of this URI is IPv6reference.
	 * 
	 * @return true iif the host part is IPv6reference
	 */
	public boolean isIPv6reference() {
		return _is_IPv6reference;
	}

	/**
	 * Tell whether or not this URI has query.
	 * 
	 * @return true iif this URI has query
	 */
	public boolean hasQuery() {
		return (_query != null);
	}

	/**
	 * Tell whether or not this URI has fragment.
	 * 
	 * @return true iif this URI has fragment
	 */
	public boolean hasFragment() {
		return (_fragment != null);
	}

	// ---------------------------------------------------------------- Charset

	/**
	 * Set the default charset of the protocol.
	 * <p>
	 * The character set used to store files SHALL remain a local decision and
	 * MAY depend on the capability of local operating systems. Prior to the
	 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format and
	 * UTF-8 encoded. This approach, while allowing international exchange of
	 * URIs, will still allow backward compatibility with older systems because
	 * the code set positions for ASCII characters are identical to the one byte
	 * sequence in UTF-8.
	 * <p>
	 * An individual URI scheme may require a single charset, define a default
	 * charset, or provide a way to indicate the charset used.
	 * 
	 * <p>
	 * Always all the time, the setter method is always succeeded and throws
	 * <code>DefaultCharsetChanged</code> exception.
	 * 
	 * So API programmer must follow the following way: <code><pre>
	 *  import org.apache.util.URI$DefaultCharsetChanged;
	 *      .
	 *      .
	 *      .
	 *  try {
	 *      URI.setDefaultProtocolCharset("UTF-8");
	 *  } catch (DefaultCharsetChanged cc) {
	 *      // CASE 1: the exception could be ignored, when it is set by user
	 *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
	 *      // CASE 2: let user know the default protocol charset changed
	 *      } else {
	 *      // CASE 2: let user know the default document charset changed
	 *      }
	 *  }
	 *  </pre></code>
	 * 
	 * The API programmer is responsible to set the correct charset. And each
	 * application should remember its own charset to support.
	 * 
	 * @param charset
	 *            the default charset for each protocol
	 * @throws DefaultCharsetChanged
	 *             default charset changed
	 */
	public static void setDefaultProtocolCharset(String charset)
			throws DefaultCharsetChanged {

		defaultProtocolCharset = charset;
		throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
				"the default protocol charset changed");
	}

	/**
	 * Get the default charset of the protocol.
	 * <p>
	 * An individual URI scheme may require a single charset, define a default
	 * charset, or provide a way to indicate the charset used.
	 * <p>
	 * To work globally either requires support of a number of character sets
	 * and to be able to convert between them, or the use of a single preferred
	 * character set. For support of global compatibility it is STRONGLY
	 * RECOMMENDED that clients and servers use UTF-8 encoding when exchanging
	 * URIs.
	 * 
	 * @return the default charset string
	 */
	public static String getDefaultProtocolCharset() {
		return defaultProtocolCharset;
	}

	/**
	 * Get the protocol charset used by this current URI instance. It was set by
	 * the constructor for this instance. If it was not set by contructor, it
	 * will return the default protocol charset.
	 * 
	 * @return the protocol charset string
	 * @see #getDefaultProtocolCharset
	 */
	public String getProtocolCharset() {
		return (protocolCharset != null) ? protocolCharset
				: defaultProtocolCharset;
	}

	/**
	 * Set the default charset of the document.
	 * <p>
	 * Notice that it will be possible to contain mixed characters (e.g.
	 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
	 * display of these character sets, the protocol charset could be simply
	 * used again. Because it's not yet implemented that the insertion of BIDI
	 * control characters at different points during composition is extracted.
	 * <p>
	 * 
	 * Always all the time, the setter method is always succeeded and throws
	 * <code>DefaultCharsetChanged</code> exception.
	 * 
	 * So API programmer must follow the following way: <code><pre>
	 *  import org.apache.util.URI$DefaultCharsetChanged;
	 *      .
	 *      .
	 *      .
	 *  try {
	 *      URI.setDefaultDocumentCharset("EUC-KR");
	 *  } catch (DefaultCharsetChanged cc) {
	 *      // CASE 1: the exception could be ignored, when it is set by user
	 *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
	 *      // CASE 2: let user know the default document charset changed
	 *      } else {
	 *      // CASE 2: let user know the default protocol charset changed
	 *      }
	 *  }
	 *  </pre></code>
	 * 
	 * The API programmer is responsible to set the correct charset. And each
	 * application should remember its own charset to support.
	 * 
	 * @param charset
	 *            the default charset for the document
	 * @throws DefaultCharsetChanged
	 *             default charset changed
	 */
	public static void setDefaultDocumentCharset(String charset)
			throws DefaultCharsetChanged {

		defaultDocumentCharset = charset;
		throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
				"the default document charset changed");
	}

	/**
	 * Get the recommended default charset of the document.
	 * 
	 * @return the default charset string
	 */
	public static String getDefaultDocumentCharset() {
		return defaultDocumentCharset;
	}

	/**
	 * Get the default charset of the document by locale.
	 * 
	 * @return the default charset string by locale
	 */
	public static String getDefaultDocumentCharsetByLocale() {
		return defaultDocumentCharsetByLocale;
	}

	/**
	 * Get the default charset of the document by platform.
	 * 
	 * @return the default charset string by platform
	 */
	public static String getDefaultDocumentCharsetByPlatform() {
		return defaultDocumentCharsetByPlatform;
	}

	// ------------------------------------------------------------- The scheme

	/**
	 * Get the scheme.
	 * 
	 * @return the scheme
	 */
	public char[] getRawScheme() {
		return _scheme;
	}

	/**
	 * Get the scheme.
	 * 
	 * @return the scheme null if undefined scheme
	 */
	public String getScheme() {
		return (_scheme == null) ? null : new String(_scheme);
	}

	// ---------------------------------------------------------- The authority

	/**
	 * Set the authority. It can be one type of server, hostport, hostname,
	 * IPv4address, IPv6reference and reg_name.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * authority = server | reg_name
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @param escapedAuthority
	 *            the raw escaped authority
	 * @throws URIException
	 *             If {@link #parseAuthority(java.lang.String,boolean)} fails
	 * @throws NullPointerException
	 *             null authority
	 */
	public void setRawAuthority(char[] escapedAuthority) throws URIException,
			NullPointerException {

		parseAuthority(new String(escapedAuthority), true);
		setURI();
	}

	/**
	 * Set the authority. It can be one type of server, hostport, hostname,
	 * IPv4address, IPv6reference and reg_name. Note that there is no
	 * setAuthority method by the escape encoding reason.
	 * 
	 * @param escapedAuthority
	 *            the escaped authority string
	 * @throws URIException
	 *             If {@link #parseAuthority(java.lang.String,boolean)} fails
	 */
	public void setEscapedAuthority(String escapedAuthority)
			throws URIException {

		parseAuthority(escapedAuthority, true);
		setURI();
	}

	/**
	 * Get the raw-escaped authority.
	 * 
	 * @return the raw-escaped authority
	 */
	public char[] getRawAuthority() {
		return _authority;
	}

	/**
	 * Get the escaped authority.
	 * 
	 * @return the escaped authority
	 */
	public String getEscapedAuthority() {
		return (_authority == null) ? null : new String(_authority);
	}

	/**
	 * Get the authority.
	 * 
	 * @return the authority
	 * @throws URIException
	 *             If {@link #decode} fails
	 */
	public String getAuthority() throws URIException {
		return (_authority == null) ? null : decode(_authority,
				getProtocolCharset());
	}

	// ----------------------------------------------------------- The userinfo

	/**
	 * Get the raw-escaped userinfo.
	 * 
	 * @return the raw-escaped userinfo
	 * @see #getAuthority
	 */
	public char[] getRawUserinfo() {
		return _userinfo;
	}

	/**
	 * Get the escaped userinfo.
	 * 
	 * @return the escaped userinfo
	 * @see #getAuthority
	 */
	public String getEscapedUserinfo() {
		return (_userinfo == null) ? null : new String(_userinfo);
	}

	/**
	 * Get the userinfo.
	 * 
	 * @return the userinfo
	 * @throws URIException
	 *             If {@link #decode} fails
	 * @see #getAuthority
	 */
	public String getUserinfo() throws URIException {
		return (_userinfo == null) ? null : decode(_userinfo,
				getProtocolCharset());
	}

	// --------------------------------------------------------------- The host

	/**
	 * Get the host.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * host = hostname | IPv4address | IPv6reference
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @return the host
	 * @see #getAuthority
	 */
	public char[] getRawHost() {
		return _host;
	}

	/**
	 * Get the host.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 * host = hostname | IPv4address | IPv6reference
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @return the host
	 * @throws URIException
	 *             If {@link #decode} fails
	 * @see #getAuthority
	 */
	public String getHost() throws URIException {
		if (_host != null) {
			return decode(_host, getProtocolCharset());
		} else {
			return null;
		}
	}

	// --------------------------------------------------------------- The port

	/**
	 * Get the port. In order to get the specfic default port, the specific
	 * protocol-supported class extended from the URI class should be used. It
	 * has the server-based naming authority.
	 * 
	 * @return the port if -1, it has the default port for the scheme or the
	 *         server-based naming authority is not supported in the specific
	 *         URI.
	 */
	public int getPort() {
		return _port;
	}

	// --------------------------------------------------------------- The path

	/**
	 * Set the raw-escaped path.
	 * 
	 * @param escapedPath
	 *            the path character sequence
	 * @throws URIException
	 *             encoding error or not proper for initial instance
	 * @see #encode
	 */
	public void setRawPath(char[] escapedPath) throws URIException {
		if (escapedPath == null || escapedPath.length == 0) {
			_path = _opaque = escapedPath;
			setURI();
			return;
		}
		// remove the fragment identifier
		escapedPath = removeFragmentIdentifier(escapedPath);
		if (_is_net_path || _is_abs_path) {
			if (escapedPath[0] != '/') {
				throw new URIException(URIException.PARSING,
						"not absolute path");
			}
			if (!validate(escapedPath, abs_path)) {
				throw new URIException(URIException.ESCAPING,
						"escaped absolute path not valid");
			}
			_path = escapedPath;
		} else if (_is_rel_path) {
			int at = indexFirstOf(escapedPath, '/');
			if (at == 0) {
				throw new URIException(URIException.PARSING, "incorrect path");
			}
			if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
					&& !validate(escapedPath, at, -1, abs_path) || at < 0
					&& !validate(escapedPath, 0, -1, rel_segment)) {

				throw new URIException(URIException.ESCAPING,
						"escaped relative path not valid");
			}
			_path = escapedPath;
		} else if (_is_opaque_part) {
			if (!uric_no_slash.get(escapedPath[0])
					&& !validate(escapedPath, 1, -1, uric)) {
				throw new URIException(URIException.ESCAPING,
						"escaped opaque part not valid");
			}
			_opaque = escapedPath;
		} else {
			throw new URIException(URIException.PARSING, "incorrect path");
		}
		setURI();
	}

	/**
	 * Set the escaped path.
	 * 
	 * @param escapedPath
	 *            the escaped path string
	 * @throws URIException
	 *             encoding error or not proper for initial instance
	 * @see #encode
	 */
	public void setEscapedPath(String escapedPath) throws URIException {
		if (escapedPath == null) {
			_path = _opaque = null;
			setURI();
			return;
		}
		setRawPath(escapedPath.toCharArray());
	}

	/**
	 * Set the path.
	 * 
	 * @param path
	 *            the path string
	 * @throws URIException
	 *             set incorrectly or fragment only
	 * @see #encode
	 */
	public void setPath(String path) throws URIException {

		if (path == null || path.length() == 0) {
			_path = _opaque = (path == null) ? null : path.toCharArray();
			setURI();
			return;
		}
		// set the charset to do escape encoding
		String charset = getProtocolCharset();

		if (_is_net_path || _is_abs_path) {
			_path = encode(path, allowed_abs_path, charset);
		} else if (_is_rel_path) {
			StringBuffer buff = new StringBuffer(path.length());
			int at = path.indexOf('/');
			if (at == 0) { // never 0
				throw new URIException(URIException.PARSING,
						"incorrect relative path");
			}
			if (at > 0) {
				buff.append(encode(path.substring(0, at), allowed_rel_path,
						charset));
				buff.append(encode(path.substring(at), allowed_abs_path,
						charset));
			} else {
				buff.append(encode(path, allowed_rel_path, charset));
			}
			_path = buff.toString().toCharArray();
		} else if (_is_opaque_part) {
			StringBuffer buf = new StringBuffer();
			buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
			buf.insert(1, encode(path.substring(1), uric, charset));
			_opaque = buf.toString().toCharArray();
		} else {
			throw new URIException(URIException.PARSING, "incorrect path");
		}
		setURI();
	}

	/**
	 * Resolve the base and relative path.
	 * 
	 * @param basePath
	 *            a character array of the basePath
	 * @param relPath
	 *            a character array of the relPath
	 * @return the resolved path
	 * @throws URIException
	 *             no more higher path level to be resolved
	 */
	protected char[] resolvePath(char[] basePath, char[] relPath)
			throws URIException {

		// REMINDME: paths are never null
		String base = (basePath == null) ? "" : new String(basePath);

		// _path could be empty
		if (relPath == null || relPath.length == 0) {
			return normalize(basePath);
		} else if (relPath[0] == '/') {
			return normalize(relPath);
		} else {
			int at = base.lastIndexOf('/');
			if (at != -1) {
				basePath = base.substring(0, at + 1).toCharArray();
			}
			StringBuffer buff = new StringBuffer(base.length() + relPath.length);
			buff.append((at != -1) ? base.substring(0, at + 1) : "/");
			buff.append(relPath);
			return normalize(buff.toString().toCharArray());
		}
	}

	/**
	 * Get the raw-escaped current hierarchy level in the given path. If the
	 * last namespace is a collection, the slash mark ('/') should be ended with
	 * at the last character of the path string.
	 * 
	 * @param path
	 *            the path
	 * @return the current hierarchy level
	 * @throws URIException
	 *             no hierarchy level
	 */
	protected char[] getRawCurrentHierPath(char[] path) throws URIException {

		if (_is_opaque_part) {
			throw new URIException(URIException.PARSING, "no hierarchy level");
		}
		if (path == null) {
			throw new URIException(URIException.PARSING, "empty path");
		}
		String buff = new String(path);
		int first = buff.indexOf('/');
		int last = buff.lastIndexOf('/');
		if (last == 0) {
			return rootPath;
		} else if (first != last && last != -1) {
			return buff.substring(0, last).toCharArray();
		}
		// FIXME: it could be a document on the server side
		return path;
	}

	/**
	 * Get the raw-escaped current hierarchy level.
	 * 
	 * @return the raw-escaped current hierarchy level
	 * @throws URIException
	 *             If {@link #getRawCurrentHierPath(char[])} fails.
	 */
	public char[] getRawCurrentHierPath() throws URIException {
		return (_path == null) ? null : getRawCurrentHierPath(_path);
	}

	/**
	 * Get the escaped current hierarchy level.
	 * 
	 * @return the escaped current hierarchy level
	 * @throws URIException
	 *             If {@link #getRawCurrentHierPath(char[])} fails.
	 */
	public String getEscapedCurrentHierPath() throws URIException {
		char[] path = getRawCurrentHierPath();
		return (path == null) ? null : new String(path);
	}

	/**
	 * Get the current hierarchy level.
	 * 
	 * @return the current hierarchy level
	 * @throws URIException
	 *             If {@link #getRawCurrentHierPath(char[])} fails.
	 * @see #decode
	 */
	public String getCurrentHierPath() throws URIException {
		char[] path = getRawCurrentHierPath();
		return (path == null) ? null : decode(path, getProtocolCharset());
	}

	/**
	 * Get the level above the this hierarchy level.
	 * 
	 * @return the raw above hierarchy level
	 * @throws URIException
	 *             If {@link #getRawCurrentHierPath(char[])} fails.
	 */
	public char[] getRawAboveHierPath() throws URIException {
		char[] path = getRawCurrentHierPath();
		return (path == null) ? null : getRawCurrentHierPath(path);
	}

	/**
	 * Get the level above the this hierarchy level.
	 * 
	 * @return the raw above hierarchy level
	 * @throws URIException
	 *             If {@link #getRawCurrentHierPath(char[])} fails.
	 */
	public String getEscapedAboveHierPath() throws URIException {
		char[] path = getRawAboveHierPath();
		return (path == null) ? null : new String(path);
	}

	/**
	 * Get the level above the this hierarchy level.
	 * 
	 * @return the above hierarchy level
	 * @throws URIException
	 *             If {@link #getRawCurrentHierPath(char[])} fails.
	 * @see #decode
	 */
	public String getAboveHierPath() throws URIException {
		char[] path = getRawAboveHierPath();
		return (path == null) ? null : decode(path, getProtocolCharset());
	}

	/**
	 * Get the raw-escaped path.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   path          = [ abs_path | opaque_part ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @return the raw-escaped path
	 */
	public char[] getRawPath() {
		return _is_opaque_part ? _opaque : _path;
	}

	/**
	 * Get the escaped path.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   path          = [ abs_path | opaque_part ]
	 *   abs_path      = "/"  path_segments 
	 *   opaque_part   = uric_no_slash *uric
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @return the escaped path string
	 */
	public String getEscapedPath() {
		char[] path = getRawPath();
		return (path == null) ? null : new String(path);
	}

	/**
	 * Get the path.
	 * <p>
	 * <blockquote>
	 * 
	 * <pre>
	 *   path          = [ abs_path | opaque_part ]
	 * </pre>
	 * 
	 * </blockquote>
	 * <p>
	 * 
	 * @return the path string
	 * @throws URIException
	 *             If {@link #decode} fails.
	 * @see #decode
	 */
	public String getPath() throws URIException {
		char[] path = getRawPath();
		return (path == null) ? null : decode(path, getProtocolCharset());
	}

	/**
	 * Get the raw-escaped basename of the path.
	 * 
	 * @return the raw-escaped basename
	 */
	public char[] getRawName() {
		if (_path == null) {
			return null;
		}

		int at = 0;
		for (int i = _path.length - 1; i >= 0; i--) {
			if (_path[i] == '/') {
				at = i + 1;
				break;
			}
		}
		int len = _path.length - at;
		char[] basename = new char[len];
		System.arraycopy(_path, at, basename, 0, len);
		return basename;
	}

	/**
	 * Get the escaped basename of the path.
	 * 
	 * @return the escaped basename string
	 */
	public String getEscapedName() {
		char[] basename = getRawName();
		return (basename == null) ? null : new String(basename);
	}

	/**
	 * Get the basename of the path.
	 * 
	 * @return the basename string
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * @see #decode
	 */
	public String getName() throws URIException {
		char[] basename = getRawName();
		return (basename == null) ? null : decode(getRawName(),
				getProtocolCharset());
	}

	// ----------------------------------------------------- The path and query

	/**
	 * Get the raw-escaped path and query.
	 * 
	 * @return the raw-escaped path and query
	 */
	public char[] getRawPathQuery() {

		if (_path == null && _query == null) {
			return null;
		}
		StringBuffer buff = new StringBuffer();
		if (_path != null) {
			buff.append(_path);
		}
		if (_query != null) {
			buff.append('?');
			buff.append(_query);
		}
		return buff.toString().toCharArray();
	}

	/**
	 * Get the escaped query.
	 * 
	 * @return the escaped path and query string
	 */
	public String getEscapedPathQuery() {
		char[] rawPathQuery = getRawPathQuery();
		return (rawPathQuery == null) ? null : new String(rawPathQuery);
	}

	/**
	 * Get the path and query.
	 * 
	 * @return the path and query string.
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * @see #decode
	 */
	public String getPathQuery() throws URIException {
		char[] rawPathQuery = getRawPathQuery();
		return (rawPathQuery == null) ? null : decode(rawPathQuery,
				getProtocolCharset());
	}

	// -------------------------------------------------------------- The query

	/**
	 * Set the raw-escaped query.
	 * 
	 * @param escapedQuery
	 *            the raw-escaped query
	 * @throws URIException
	 *             escaped query not valid
	 */
	public void setRawQuery(char[] escapedQuery) throws URIException {
		if (escapedQuery == null || escapedQuery.length == 0) {
			_query = escapedQuery;
			setURI();
			return;
		}
		// remove the fragment identifier
		escapedQuery = removeFragmentIdentifier(escapedQuery);
		if (!validate(escapedQuery, query)) {
			throw new URIException(URIException.ESCAPING,
					"escaped query not valid");
		}
		_query = escapedQuery;
		setURI();
	}

	/**
	 * Set the escaped query string.
	 * 
	 * @param escapedQuery
	 *            the escaped query string
	 * @throws URIException
	 *             escaped query not valid
	 */
	public void setEscapedQuery(String escapedQuery) throws URIException {
		if (escapedQuery == null) {
			_query = null;
			setURI();
			return;
		}
		setRawQuery(escapedQuery.toCharArray());
	}

	/**
	 * Set the query.
	 * <p>
	 * When a query string is not misunderstood the reserved special characters
	 * ("&amp;", "=", "+", ",", and "$") within a query component, it is
	 * recommended to use in encoding the whole query with this method.
	 * <p>
	 * The additional APIs for the special purpose using by the reserved special
	 * characters used in each protocol are implemented in each protocol classes
	 * inherited from <code>URI</code>. So refer to the same-named APIs
	 * implemented in each specific protocol instance.
	 * 
	 * @param query
	 *            the query string.
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * @see #encode
	 */
	public void setQuery(String query) throws URIException {
		if (query == null || query.length() == 0) {
			_query = (query == null) ? null : query.toCharArray();
			setURI();
			return;
		}
		setRawQuery(encode(query, allowed_query, getProtocolCharset()));
	}

	/**
	 * Get the raw-escaped query.
	 * 
	 * @return the raw-escaped query
	 */
	public char[] getRawQuery() {
		return _query;
	}

	/**
	 * Get the escaped query.
	 * 
	 * @return the escaped query string
	 */
	public String getEscapedQuery() {
		return (_query == null) ? null : new String(_query);
	}

	/**
	 * Get the query.
	 * 
	 * @return the query string.
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * @see #decode
	 */
	public String getQuery() throws URIException {
		return (_query == null) ? null : decode(_query, getProtocolCharset());
	}

	// ----------------------------------------------------------- The fragment

	/**
	 * Set the raw-escaped fragment.
	 * 
	 * @param escapedFragment
	 *            the raw-escaped fragment
	 * @throws URIException
	 *             escaped fragment not valid
	 */
	public void setRawFragment(char[] escapedFragment) throws URIException {
		if (escapedFragment == null || escapedFragment.length == 0) {
			_fragment = escapedFragment;
			hash = 0;
			return;
		}
		if (!validate(escapedFragment, fragment)) {
			throw new URIException(URIException.ESCAPING,
					"escaped fragment not valid");
		}
		_fragment = escapedFragment;
		hash = 0;
	}

	/**
	 * Set the escaped fragment string.
	 * 
	 * @param escapedFragment
	 *            the escaped fragment string
	 * @throws URIException
	 *             escaped fragment not valid
	 */
	public void setEscapedFragment(String escapedFragment) throws URIException {
		if (escapedFragment == null) {
			_fragment = null;
			hash = 0;
			return;
		}
		setRawFragment(escapedFragment.toCharArray());
	}

	/**
	 * Set the fragment.
	 * 
	 * @param fragment
	 *            the fragment string.
	 * @throws URIException
	 *             If an error occurs.
	 */
	public void setFragment(String fragment) throws URIException {
		if (fragment == null || fragment.length() == 0) {
			_fragment = (fragment == null) ? null : fragment.toCharArray();
			hash = 0;
			return;
		}
		_fragment = encode(fragment, allowed_fragment, getProtocolCharset());
		hash = 0;
	}

	/**
	 * Get the raw-escaped fragment.
	 * <p>
	 * The optional fragment identifier is not part of a URI, but is often used
	 * in conjunction with a URI.
	 * <p>
	 * The format and interpretation of fragment identifiers is dependent on the
	 * media type [RFC2046] of the retrieval result.
	 * <p>
	 * A fragment identifier is only meaningful when a URI reference is intended
	 * for retrieval and the result of that retrieval is a document for which
	 * the identified fragment is consistently defined.
	 * 
	 * @return the raw-escaped fragment
	 */
	public char[] getRawFragment() {
		return _fragment;
	}

	/**
	 * Get the escaped fragment.
	 * 
	 * @return the escaped fragment string
	 */
	public String getEscapedFragment() {
		return (_fragment == null) ? null : new String(_fragment);
	}

	/**
	 * Get the fragment.
	 * 
	 * @return the fragment string
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * @see #decode
	 */
	public String getFragment() throws URIException {
		return (_fragment == null) ? null : decode(_fragment,
				getProtocolCharset());
	}

	// ------------------------------------------------------------- Utilities

	/**
	 * Remove the fragment identifier of the given component.
	 * 
	 * @param component
	 *            the component that a fragment may be included
	 * @return the component that the fragment identifier is removed
	 */
	protected char[] removeFragmentIdentifier(char[] component) {
		if (component == null) {
			return null;
		}
		int lastIndex = new String(component).indexOf('#');
		if (lastIndex != -1) {
			component = new String(component).substring(0, lastIndex)
					.toCharArray();
		}
		return component;
	}

	/**
	 * Normalize the given hier path part.
	 * 
	 * <p>
	 * Algorithm taken from URI reference parser at
	 * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
	 * 
	 * @param path
	 *            the path to normalize
	 * @return the normalized path
	 * @throws URIException
	 *             no more higher path level to be normalized
	 */
	protected char[] normalize(char[] path) throws URIException {

		if (path == null) {
			return null;
		}

		String normalized = new String(path);

		// If the buffer begins with "./" or "../", the "." or ".." is removed.
		if (normalized.startsWith("./")) {
			normalized = normalized.substring(1);
		} else if (normalized.startsWith("../")) {
			normalized = normalized.substring(2);
		} else if (normalized.startsWith("..")) {
			normalized = normalized.substring(2);
		}

		// All occurrences of "/./" in the buffer are replaced with "/"
		int index = -1;
		while ((index = normalized.indexOf("/./")) != -1) {
			normalized = normalized.substring(0, index)
					+ normalized.substring(index + 2);
		}

		// If the buffer ends with "/.", the "." is removed.
		if (normalized.endsWith("/.")) {
			normalized = normalized.substring(0, normalized.length() - 1);
		}

		int startIndex = 0;

		// All occurrences of "/<segment>/../" in the buffer, where ".."
		// and <segment> are complete path segments, are iteratively replaced
		// with "/" in order from left to right until no matching pattern
		// remains.
		// If the buffer ends with "/<segment>/..", that is also replaced
		// with "/". Note that <segment> may be empty.
		while ((index = normalized.indexOf("/../", startIndex)) != -1) {
			int slashIndex = normalized.lastIndexOf('/', index - 1);
			if (slashIndex >= 0) {
				normalized = normalized.substring(0, slashIndex)
						+ normalized.substring(index + 3);
			} else {
				startIndex = index + 3;
			}
		}
		if (normalized.endsWith("/..")) {
			int slashIndex = normalized.lastIndexOf('/',
					normalized.length() - 4);
			if (slashIndex >= 0) {
				normalized = normalized.substring(0, slashIndex + 1);
			}
		}

		// All prefixes of "<segment>/../" in the buffer, where ".."
		// and <segment> are complete path segments, are iteratively replaced
		// with "/" in order from left to right until no matching pattern
		// remains.
		// If the buffer ends with "<segment>/..", that is also replaced
		// with "/". Note that <segment> may be empty.
		while ((index = normalized.indexOf("/../")) != -1) {
			int slashIndex = normalized.lastIndexOf('/', index - 1);
			if (slashIndex >= 0) {
				break;
			} else {
				normalized = normalized.substring(index + 3);
			}
		}
		if (normalized.endsWith("/..")) {
			int slashIndex = normalized.lastIndexOf('/',
					normalized.length() - 4);
			if (slashIndex < 0) {
				normalized = "/";
			}
		}

		return normalized.toCharArray();
	}

	/**
	 * Normalizes the path part of this URI. Normalization is only meant to be
	 * performed on URIs with an absolute path. Calling this method on a
	 * relative path URI will have no effect.
	 * 
	 * @throws URIException
	 *             no more higher path level to be normalized
	 * 
	 * @see #isAbsPath()
	 */
	public void normalize() throws URIException {
		if (isAbsPath()) {
			_path = normalize(_path);
			setURI();
		}
	}

	/**
	 * Test if the first array is equal to the second array.
	 * 
	 * @param first
	 *            the first character array
	 * @param second
	 *            the second character array
	 * @return true if they're equal
	 */
	protected boolean equals(char[] first, char[] second) {

		if (first == null && second == null) {
			return true;
		}
		if (first == null || second == null) {
			return false;
		}
		if (first.length != second.length) {
			return false;
		}
		for (int i = 0; i < first.length; i++) {
			if (first[i] != second[i]) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Test an object if this URI is equal to another.
	 * 
	 * @param obj
	 *            an object to compare
	 * @return true if two URI objects are equal
	 */
	public boolean equals(Object obj) {

		// normalize and test each components
		if (obj == this) {
			return true;
		}
		if (!(obj instanceof URI)) {
			return false;
		}
		URI another = (URI) obj;
		// scheme
		if (!equals(_scheme, another._scheme)) {
			return false;
		}
		// is_opaque_part or is_hier_part? and opaque
		if (!equals(_opaque, another._opaque)) {
			return false;
		}
		// is_hier_part
		// has_authority
		if (!equals(_authority, another._authority)) {
			return false;
		}
		// path
		if (!equals(_path, another._path)) {
			return false;
		}
		// has_query
		if (!equals(_query, another._query)) {
			return false;
		}
		// has_fragment? should be careful of the only fragment case.
		if (!equals(_fragment, another._fragment)) {
			return false;
		}
		return true;
	}

	// ---------------------------------------------------------- Serialization

	/**
	 * Write the content of this URI.
	 * 
	 * @param oos
	 *            the object-output stream
	 * @throws IOException
	 *             If an IO problem occurs.
	 */
	private void writeObject(ObjectOutputStream oos) throws IOException {

		oos.defaultWriteObject();
	}

	/**
	 * Read a URI.
	 * 
	 * @param ois
	 *            the object-input stream
	 * @throws ClassNotFoundException
	 *             If one of the classes specified in the input stream cannot be
	 *             found.
	 * @throws IOException
	 *             If an IO problem occurs.
	 */
	private void readObject(ObjectInputStream ois)
			throws ClassNotFoundException, IOException {

		ois.defaultReadObject();
	}

	// -------------------------------------------------------------- Hash code

	/**
	 * Return a hash code for this URI.
	 * 
	 * @return a has code value for this URI
	 */
	public int hashCode() {
		if (hash == 0) {
			char[] c = _uri;
			if (c != null) {
				for (int i = 0, len = c.length; i < len; i++) {
					hash = 31 * hash + c[i];
				}
			}
			c = _fragment;
			if (c != null) {
				for (int i = 0, len = c.length; i < len; i++) {
					hash = 31 * hash + c[i];
				}
			}
		}
		return hash;
	}

	// ------------------------------------------------------------- Comparison

	/**
	 * Compare this URI to another object.
	 * 
	 * @param obj
	 *            the object to be compared.
	 * @return 0, if it's same, -1, if failed, first being compared with in the
	 *         authority component
	 * @throws ClassCastException
	 *             not URI argument
	 */
	public int compareTo(Object obj) throws ClassCastException {

		URI another = (URI) obj;
		if (!equals(_authority, another.getRawAuthority())) {
			return -1;
		}
		return toString().compareTo(another.toString());
	}

	// ------------------------------------------------------------------ Clone

	/**
	 * Create and return a copy of this object, the URI-reference containing the
	 * userinfo component. Notice that the whole URI-reference including the
	 * userinfo component counld not be gotten as a <code>String</code>.
	 * <p>
	 * To copy the identical <code>URI</code> object including the userinfo
	 * component, it should be used.
	 * 
	 * @return a clone of this instance
	 */
	public synchronized Object clone() throws CloneNotSupportedException {

		URI instance = (URI) super.clone();

		instance._uri = _uri;
		instance._scheme = _scheme;
		instance._opaque = _opaque;
		instance._authority = _authority;
		instance._userinfo = _userinfo;
		instance._host = _host;
		instance._port = _port;
		instance._path = _path;
		instance._query = _query;
		instance._fragment = _fragment;
		// the charset to do escape encoding for this instance
		instance.protocolCharset = protocolCharset;
		// flags
		instance._is_hier_part = _is_hier_part;
		instance._is_opaque_part = _is_opaque_part;
		instance._is_net_path = _is_net_path;
		instance._is_abs_path = _is_abs_path;
		instance._is_rel_path = _is_rel_path;
		instance._is_reg_name = _is_reg_name;
		instance._is_server = _is_server;
		instance._is_hostname = _is_hostname;
		instance._is_IPv4address = _is_IPv4address;
		instance._is_IPv6reference = _is_IPv6reference;

		return instance;
	}

	// ------------------------------------------------------------ Get the URI

	/**
	 * It can be gotten the URI character sequence. It's raw-escaped. For the
	 * purpose of the protocol to be transported, it will be useful.
	 * <p>
	 * It is clearly unwise to use a URL that contains a password which is
	 * intended to be secret. In particular, the use of a password within the
	 * 'userinfo' component of a URL is strongly disrecommended except in those
	 * rare cases where the 'password' parameter is intended to be public.
	 * <p>
	 * When you want to get each part of the userinfo, you need to use the
	 * specific methods in the specific URL. It depends on the specific URL.
	 * 
	 * @return the URI character sequence
	 */
	public char[] getRawURI() {
		return _uri;
	}

	/**
	 * It can be gotten the URI character sequence. It's escaped. For the
	 * purpose of the protocol to be transported, it will be useful.
	 * 
	 * @return the escaped URI string
	 */
	public String getEscapedURI() {
		return (_uri == null) ? null : new String(_uri);
	}

	/**
	 * It can be gotten the URI character sequence.
	 * 
	 * @return the original URI string
	 * @throws URIException
	 *             incomplete trailing escape pattern or unsupported character
	 *             encoding
	 * @see #decode
	 */
	public String getURI() throws URIException {
		return (_uri == null) ? null : decode(_uri, getProtocolCharset());
	}

	/**
	 * Get the URI reference character sequence.
	 * 
	 * @return the URI reference character sequence
	 */
	public char[] getRawURIReference() {
		if (_fragment == null) {
			return _uri;
		}
		if (_uri == null) {
			return _fragment;
		}
		// if _uri != null && _fragment != null
		String uriReference = new String(_uri) + "#" + new String(_fragment);
		return uriReference.toCharArray();
	}

	/**
	 * Get the escaped URI reference string.
	 * 
	 * @return the escaped URI reference string
	 */
	public String getEscapedURIReference() {
		char[] uriReference = getRawURIReference();
		return (uriReference == null) ? null : new String(uriReference);
	}

	/**
	 * Get the original URI reference string.
	 * 
	 * @return the original URI reference string
	 * @throws URIException
	 *             If {@link #decode} fails.
	 */
	public String getURIReference() throws URIException {
		char[] uriReference = getRawURIReference();
		return (uriReference == null) ? null : decode(uriReference,
				getProtocolCharset());
	}

	/**
	 * Get the escaped URI string.
	 * <p>
	 * On the document, the URI-reference form is only used without the userinfo
	 * component like http://jakarta.apache.org/ by the security reason. But the
	 * URI-reference form with the userinfo component could be parsed.
	 * <p>
	 * In other words, this URI and any its subclasses must not expose the
	 * URI-reference expression with the userinfo component like
	 * http://user:password@hostport/restricted_zone.<br>
	 * It means that the API client programmer should extract each user and
	 * password to access manually. Probably it will be supported in the each
	 * subclass, however, not a whole URI-reference expression.
	 * 
	 * @return the escaped URI string
	 * @see #clone()
	 */
	public String toString() {
		return getEscapedURI();
	}

	// ------------------------------------------------------------ Inner class

	/**
	 * The charset-changed normal operation to represent to be required to alert
	 * to user the fact the default charset is changed.
	 */
	public static class DefaultCharsetChanged extends RuntimeException {

		// ------------------------------------------------------- constructors

		/**
		 * The constructor with a reason string and its code arguments.
		 * 
		 * @param reasonCode
		 *            the reason code
		 * @param reason
		 *            the reason
		 */
		public DefaultCharsetChanged(int reasonCode, String reason) {
			super(reason);
			this.reason = reason;
			this.reasonCode = reasonCode;
		}

		// ---------------------------------------------------------- constants

		/** No specified reason code. */
		public static final int UNKNOWN = 0;

		/** Protocol charset changed. */
		public static final int PROTOCOL_CHARSET = 1;

		/** Document charset changed. */
		public static final int DOCUMENT_CHARSET = 2;

		// ------------------------------------------------- instance variables

		/** The reason code. */
		private int reasonCode;

		/** The reason message. */
		private String reason;

		// ------------------------------------------------------------ methods

		/**
		 * Get the reason code.
		 * 
		 * @return the reason code
		 */
		public int getReasonCode() {
			return reasonCode;
		}

		/**
		 * Get the reason message.
		 * 
		 * @return the reason message
		 */
		public String getReason() {
			return reason;
		}

	}

	/**
	 * A mapping to determine the (somewhat arbitrarily) preferred charset for a
	 * given locale. Supports all locales recognized in JDK 1.1.
	 * <p>
	 * The distribution of this class is Servlets.com. It was originally written
	 * by Jason Hunter [jhunter at acm.org] and used by with permission.
	 */
	public static class LocaleToCharsetMap {

		/** A mapping of language code to charset */
		private static final Hashtable LOCALE_TO_CHARSET_MAP;
		static {
			LOCALE_TO_CHARSET_MAP = new Hashtable();
			LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
			LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
			LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
			LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
			LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
			LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
			LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
			LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
			LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
			LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
			LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
		}

		/**
		 * Get the preferred charset for the given locale.
		 * 
		 * @param locale
		 *            the locale
		 * @return the preferred charset or null if the locale is not
		 *         recognized.
		 */
		public static String getCharset(Locale locale) {
			// try for an full name match (may include country)
			String charset = (String) LOCALE_TO_CHARSET_MAP.get(locale
					.toString());
			if (charset != null) {
				return charset;
			}

			// if a full name didn't match, try just the language
			charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
			return charset; // may be null
		}

	}

}
